# -*- coding: utf-8 -*-
'''
Created on Mar 2, 2013

@author: LONG HOANG GIANG
'''

from CrawlerLib2 import commonlib, html2text
from Model.storymodelv3 import StoryModel
import os
import re
import datetime
from urlparse import urljoin


class TuSachMobi():
    
    __url = ''
    __database = '/longhoanggiang/database/{0}'
    
    def __init__(self, url, database):
        self.__url = url
        self.__database = self.__database.format(database)
        
    def getDetail(self, item):
        tree = commonlib.loadweb(item['url']).build_tree()
        contentNode = tree.xpath("//div[@id='content']")[0]
        html = html2text.html2text(commonlib.Etree.tostring(contentNode)).encode('utf-8').strip()
        print html
        html = re.sub(r"\n", "<br />", html)
        html = '''<b>{0}</b><br /><hr /><br />{1}'''.format(commonlib.toUpper(item['title']), html)
        return html
        
        
    def getListCat(self):
        tree = commonlib.loadweb(self.__url).build_tree()
        data = []
        for node in tree.xpath("//div[@class='cateItem']//a"):
            title = commonlib.stringify(node)
            href = node.get('href', '').strip()
            if href == '': continue
            href = urljoin('http://tusach.mobi/', href)
            print title, href
            data.append({'title': title, 'url': href})
        return data
        
    def process(self):
        model = StoryModel(self.__database)
        model.open(True)
        chapters = self.getListCat()
        for chapter in chapters:
            detail = self.getDetail(chapter)
            title = commonlib.toUpper(chapter['title'])
            model.add_story(title, detail, 0)
        model.close()


if __name__ == '__main__':
    
#    a = TuSachMobi('http://tusach.mobi/18.y-hoc-suc-khoe/10101.canh-duong-sinh-gia-lap-thach-hoa.htm', 'canhduongsinh')
#    a.process()
    
    stories = [
            ['http://tusach.mobi/15.suy-ngam-lam-nguoi/9666.99-khoanh-khac-doi-nguoi-zhang-zi-wen.htm', '99khoanhkhacdn'],
            ['http://tusach.mobi/23.tieu-thuyet/10591.bong-bong-mua-he-minh-hieu-khe.htm', 'bongbongmuahe'],
            ['http://tusach.mobi/24.ton-giao-chinh-tri/10832.bai-hoc-kinh-thanh-khong-ro.htm', 'baihockt'],
            ['http://tusach.mobi/24.ton-giao-chinh-tri/10843.chet-va-tai-sinh-thich-nguyen-tang.htm', 'chetvataisinh'],
            ['http://tusach.mobi/22.van-hoc-co-dien-nuoc-ngoai/10556.ly-tri-va-tinh-cam-jane-austen.htm', 'litrivatinhcam'],
            
            ]
    
    for story in stories:
        a = TuSachMobi(story[0], story[1])
        a.process()
    
    print '> Finished at {0}'.format(datetime.datetime.now())
    os._exit(1)
    
    
        